library(seqinr)
library(stringr)
library(dplyr)
library(epiDisplay)
library(gmodels)
library(ggplot2)
TPP_GSB <- read.csv("D:/Pipeline comparisons/Writing/Data/GSB/Rice/TPP_GSB.csv")
MQ_GSB <- read.csv("D:/Pipeline comparisons/Writing/Data/GSB/Rice/MQ_GSB.csv")
MQ_GSB$PROTEIN_LOC <- paste0(MQ_GSB$Single_Protein,"_",MQ_GSB$PROTEIN_POS_NUM)
PD_GSB <- read.csv("D:/Pipeline comparisons/Writing/Data/GSB/Rice/PD_GSB.csv")
R_TPP_GSB <- dplyr::select(TPP_GSB,c("PROTEIN_LOC", "cat", "Amino"))
R_MQ_GSB <- dplyr::select(MQ_GSB,c("PROTEIN_LOC","cat", "Amino"))
R_PD_GSB <- dplyr::select(PD_GSB,c("PROTEIN_LOC","cat", "Amino"))
R_TPP_GSB <- R_TPP_GSB %>%
rename(
TPP_cat = cat,
TPP_amino = Amino
)
R_MQ_GSB <- R_MQ_GSB %>%
rename(
MQ_cat = cat,
MQ_amino = Amino
)
R_PD_GSB <- R_PD_GSB %>%
rename(
PD_cat = cat,
PD_amino = Amino
)
merged_TPP_PD <- merge(R_TPP_GSB,R_PD_GSB, by="PROTEIN_LOC", all = TRUE)
All_merged <- merge(merged_TPP_PD,R_MQ_GSB, by="PROTEIN_LOC", all = TRUE)
All_merged$`Number of matches` <- rowSums(!is.na(All_merged[c('TPP_cat', 'MQ_cat', 'PD_cat')]))
All_merged$TPPvsMQ <- rowSums(!is.na(All_merged[c('TPP_cat', 'MQ_cat')]))
All_merged$TPPvsPD <- rowSums(!is.na(All_merged[c('TPP_cat', 'PD_cat')]))
All_merged$MQvsPD <- rowSums(!is.na(All_merged[c('MQ_cat', 'PD_cat')]))
All_merged$`TPP vs MQ` <- ifelse(All_merged$TPPvsMQ==2,"Y","N")
All_merged$`TPP vs PD` <- ifelse(All_merged$TPPvsPD==2,"Y","N")
All_merged$`MQ vs PD` <- ifelse(All_merged$MQvsPD==2,"Y","N")
All_merged$TPP_cat_num <- if_else(All_merged$TPP_cat=="Bronze",1,if_else(All_merged$TPP_cat=="Silver",2,3))
All_merged$MQ_cat_num <- if_else(All_merged$MQ_cat=="Bronze",1,if_else(All_merged$MQ_cat=="Silver",2,3))
All_merged$PD_cat_num <- if_else(All_merged$PD_cat=="Bronze",1,if_else(All_merged$PD_cat=="Silver",2,3))
All_merged$cat_num <- rowMeans(All_merged[,c("TPP_cat_num","MQ_cat_num","PD_cat_num")], na.rm=TRUE)
boxplot(All_merged$cat_num ~ All_merged$`Number of matches`)
ggplot(All_merged, aes(x=as.factor(`Number of matches`), fill=as.factor(`Number of matches`) )) +
geom_bar( ) +
scale_fill_hue(c = 40) +
theme(legend.position="none") + labs(x = "Number of matches across pipelines") +
geom_text(aes(label = ..count..), stat = "count", vjust = 1.5,size=6) +
theme(text = element_text(size = 24)) + xlab("")
tab1(All_merged$`TPP vs MQ`)
tab1(All_merged$`TPP vs PD`)
tab1(All_merged$`MQ vs PD`)
View(All_merged)
df2 <- data.frame(Comparison=c("TPP vs MQ", "TPP vs PD", "MQ vs PD"),
Level=c("TPP vs MQ", "TPP vs PD", "MQ vs PD"),
Common_sites=c(8018,5555,9943))
ggplot(data=df2, aes(x=Comparison, y=Common_sites, fill=Comparison)) +
geom_bar(stat="identity")
p <- df2 %>%
dplyr::arrange(Common_sites) %>%
mutate(Comparison = factor(Comparison, levels=c("TPP vs MQ", "TPP vs PD", "MQ vs PD"))) %>%
ggplot(aes(x=Comparison, y=Common_sites, fill=Comparison)) +
scale_fill_hue(c = 90)+
geom_bar(stat="identity") + theme(text = element_text(size = 18), legend.position = "none")  +
xlab("")
p + geom_text(aes(label = Common_sites), position = position_stack(vjust = 0.9),size=6)
View(All_merged)
ggplot(All_merged, aes(x=as.factor(`Number of matches`), fill=as.factor(`Number of matches`) )) +
geom_bar( ) +
scale_fill_hue(c = 40) +
theme(legend.position="none") + labs(x = "Number of matches across pipelines") +
geom_text(aes(label = ..count..), stat = "count", vjust = 1.5,size=6) +
theme(text = element_text(size = 24)) + xlab("")
boxplot(All_merged, aes(cat_num ,`Number of matches`))
q
boxplot(All_merged, aes(cat_num ,`Number of matches`)) +
geom_boxplot()
boxplot(All_merged$cat_num ~ All_merged$`Number of matches`)
title(xlab="Number of matches", ylab="Average nominal score")
boxplot(All_merged$cat_num ~ All_merged$`Number of matches`, axes=FALSE)
title(xlab="Number of matches", ylab="Average nominal score")
boxplot(All_merged$cat_num ~ All_merged$`Number of matches`, axes=FALSE)
boxplot(All_merged$cat_num ~ All_merged$`Number of matches`, labels=FALSE)
boxplot(All_merged$cat_num ~ All_merged$`Number of matches`, x.labels=FALSE)
boxplot(All_merged$cat_num ~ All_merged$`Number of matches`, labels=FALSE)
boxplot(All_merged$cat_num ~ All_merged$`Number of matches`, lab=FALSE)
boxplot(cat_num ~ `Number of matches`, data = All_merged)
boxplot(cat_num ~ `Number of matches`, data = All_merged,xlab="Number of matches", ylab="Average nominal score")
ggplot(All_merged, aes(x=as.factor(`Number of matches`), fill=as.factor(`Number of matches`) )) +
geom_bar( ) +
scale_fill_hue(c = 40) +
theme(legend.position="none") + labs(x = "Number of matches across pipelines") +
geom_text(aes(label = ..count..), stat = "count", vjust = 1.5,size=6) +
theme(text = element_text(size = 24)) + xlab("")
ggplot(All_merged, aes(x=as.factor(`Number of matches`), fill=as.factor(`Number of matches`) )) +
geom_bar( ) +
scale_fill_hue(c = 40) +
theme(legend.position="none") + labs(x = "Number of matches") +
geom_text(aes(label = ..count..), stat = "count", vjust = 1.5,size=6) +
theme(text = element_text(size = 24)) + xlab("")
ggplot(All_merged, aes(x=as.factor(`Number of matches`), fill=as.factor(`Number of matches`) )) +
geom_bar( ) +
scale_fill_hue(c = 40) +
theme(legend.position="none") + labs(x = "Number of matches") +
geom_text(aes(label = ..count..), stat = "count", vjust = 1,size=6) +
theme(text = element_text(size = 24)) + xlab("")
ggplot(All_merged, aes(x=as.factor(`Number of matches`), fill=as.factor(`Number of matches`) )) +
geom_bar( ) +
scale_fill_hue(c = 40) +
theme(legend.position="none") + labs(x = "Number of matches") +
geom_text(aes(label = ..count..), stat = "count", vjust = 1,size=6) +
theme(text = element_text(size = 24)) + xlab("Number of matches")
par(cex.lab=1.5)
par(cex.axis=1.5)
boxplot(cat_num ~ `Number of matches`, data = All_merged,xlab="Number of matches", ylab="Average nominal score")
par(cex.lab=2)
par(cex.axis=2)
boxplot(cat_num ~ `Number of matches`, data = All_merged,xlab="Number of matches", ylab="Average nominal score")
par(cex.lab=1.8)
par(cex.axis=1.8)
boxplot(cat_num ~ `Number of matches`, data = All_merged,xlab="Number of matches", ylab="Average nominal score")
